Data science Venn Diagram.
2/14/2020
Data science Venn Diagram.
Water can form high- (left) and low-density amorphous ices at liquid-nitrogen temperatures. Researchers want to determine whether water can also form two distinct liquid phases at low temperature. Credit: Osamu Mishima
“When a country owes more than 90 percent of its GDP, it slides into recession.”
Imagine we have the following data:
| date | lab_test | temp | infections |
|---|---|---|---|
| 2020-02-01 | 7 | 3.0 | 17 |
| 2020-02-02 | 7 | 6.9 | 14 |
| 2020-02-03 | 7 | 14.3 | 14 |
| 2020-02-04 | 4 | 11.5 | 14 |
| 2020-02-05 | 4 | 2.9 | 12 |
process_data <- function(data){
data %>%
# verify all columns are present
(function(x){x}) %>%
# verify numbers are non-negative
(function(x){x}) %>%
# verify all dates are unique
(function(x){x})
}
process_data <- function(data){
data %>%
# verify all columns are present
verify(has_all_names("date","lab_test","temp","infections"),
error_fun=just_warn) %>%
# verify numbers are non-negative
(function(x){x}) %>%
# verify all dates are unique
(function(x){x})
}
process_data <- function(data){
data %>%
# verify all columns are present
verify(has_all_names("date","lab_test","temp","infections"),
error_fun=just_warn) %>%
# verify numbers are non-negative
assert(within_bounds(0,Inf),c("infections","temp","lab_test"),
error_fun=just_warn) %>%
# verify all dates are unique
(function(x){x})
}
process_data <- function(data){
data %>%
# verify all columns are present
verify(has_all_names("date","lab_test","temp","infections"),
error_fun=just_warn) %>%
# verify numbers are non-negative
assert(within_bounds(0,Inf),c("infections","temp","lab_test"),
error_fun=just_warn) %>%
# verify all dates are unique
assert(is_uniq,date,error_fun=just_warn)
}
process_data(data) %>% head(n=5)
## # A tibble: 5 x 4 ## date lab_test temp infections ## <chr> <dbl> <dbl> <dbl> ## 1 2020-02-01 7 3 17 ## 2 2020-02-02 7 6.9 14 ## 3 2020-02-03 7 14.3 14 ## 4 2020-02-04 4 11.5 14 ## 5 2020-02-05 4 2.9 12
cleaned_data <- data %>%
add_row(date="2020-02-01",lab_test=1,
temp=20.,infections=17) %>%
process_data()
## Column 'date' violates assertion 'is_uniq' 2 times ## verb redux_fn predicate column index value ## 1 assert NA is_uniq date 1 2020-02-01 ## 2 assert NA is_uniq date 21 2020-02-01
## Warning: assertr encountered errors
cleaned_data <- data %>%
add_row(date="2020-02-21",lab_test=-1,
temp=20.,infections=17) %>%
process_data()
## Column 'lab_test' violates assertion 'within_bounds(0, Inf)' 1 time ## verb redux_fn predicate column index value ## 1 assert NA within_bounds(0, Inf) lab_test 21 -1
## Warning: assertr encountered errors
test_that("process data", {
expect_type(process_data(data), "list")
})
test_that("date not unique", {
expect_warning(
data %>%
add_row(date="2020-02-20",
lab_test=1,
temp=20.,
infections=17) %>%
process_data()
)
})
## Column 'date' violates assertion 'is_uniq' 2 times ## verb redux_fn predicate column index value ## 1 assert NA is_uniq date 20 2020-02-20 ## 2 assert NA is_uniq date 21 2020-02-20
test_that("Negative numbers", {
expect_warning(
data %>%
add_row(date="2020-02-21",
lab_test=-1,
temp=20.,
infections=17) %>%
process_data()
)
})
## Column 'lab_test' violates assertion 'within_bounds(0, Inf)' 1 time ## verb redux_fn predicate column index value ## 1 assert NA within_bounds(0, Inf) lab_test 21 -1
testthat::test_dir("example_code/tests")
## v | OK F W S | Context ## / | 0 | test_process_dataColumn 'date' violates assertion 'is_uniq' 2 times ## verb redux_fn predicate column index value ## 1 assert NA is_uniq date 20 2020-02-20 ## 2 assert NA is_uniq date 21 2020-02-20 ## ## Column 'lab_test' violates assertion 'within_bounds(0, Inf)' 1 time ## verb redux_fn predicate column index value ## 1 assert NA within_bounds(0, Inf) lab_test 21 -1 ## ## v | 3 | test_process_data ## ## == Results ======================================================================================= ## OK: 3 ## Failed: 0 ## Warnings: 0 ## Skipped: 0 ## ## You rock!
expect(umbrellaOpens).toBe(true)
tests: 1 passed, 1 total